{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "402d0e02",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import geopandas as gpd\n",
    "import os, time, json\n",
    "import pyreadstat\n",
    "\n",
    "os.chdir(\"/Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Build/\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8887a32a",
   "metadata": {},
   "source": [
    "# 2017 data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e655d07",
   "metadata": {},
   "outputs": [],
   "source": [
    "df17, meta17 = pyreadstat.read_sav(\n",
    "    \"./Input/ACAPMA/ORD-288245-B5P3 Petrol and Convenient Express Stores II Final Data_14Aug/ORD-288245-B5P3 Petrol and Convenient Express Stores II Final Data_14Aug.sav\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d23fbc7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df17_out = df17.copy()\n",
    "df17_out['location'] = df17_out['Q3'].map(meta17.variable_value_labels['Q3'])\n",
    "df17_out['state'] = df17_out['STATE'].map(meta17.variable_value_labels['STATE'])\n",
    "df17_out['area'] = df17_out['AREA'].map(meta17.variable_value_labels['AREA'])\n",
    "df17_out['gender'] = df17_out['Q5'].map(meta17.variable_value_labels['Q5'])\n",
    "df17_out['age'] = df17_out['Q6'].map(meta17.variable_value_labels['Q6'])\n",
    "df17_out['hh_inc'] = df17_out['Q7'].map(meta17.variable_value_labels['Q7'])\n",
    "df17_out['visit_freq'] = df17_out['Q10'].map(meta17.variable_value_labels['Q10'])\n",
    "df17_out['Q13_10OTH'] = df17_out['Q13_10OTH'].fillna('')\n",
    "df17_out.loc[df17_out['Q13_10OTH'].str.lower().str.contains('wool'), 'Q13'] = 4\n",
    "df17_out.loc[df17_out['Q13_10OTH'].str.lower()=='bp', 'Q13'] = 1\n",
    "df17_out['brand'] = df17_out['Q13'].map(meta17.variable_value_labels['Q13'])\n",
    "df17_out['why'] = df17_out['Q14'].map(meta17.variable_value_labels['Q14'])\n",
    "df17_out['dist_search'] = df17_out['Q20'].map(meta17.variable_value_labels['Q20'])\n",
    "df17_out['postcode'] = df17_out['Q4']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "82b61e2a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df17_out['bn'] = df17_out['brand'].copy()\n",
    "df17_out.loc[df17_out['brand']=='petrol', 'bn'] = 'woolworths'\n",
    "df17_out.loc[df17_out['brand']=='sevenEleven', 'bn'] = '7-11'\n",
    "df17_out.loc[~df17_out['bn'].isin(['bp', 'caltex', 'coles', 'woolworths', '7-11']), 'bn'] = 'other'\n",
    "df17_out.loc[df17_out['brand'].isnull(), 'bn'] = 'N/A'\n",
    "df17_out['year'] = 2017"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b81fbd4f",
   "metadata": {},
   "source": [
    "# 2015 data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cdfb594b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df15 = pd.read_excel(\"./Input/ACAPMA/2015 National Monitor of Fuel Consumer Attitudes - Survey Manager Raw Data.xlsx\")\n",
    "df15 = df15.replace(-999, np.nan)\n",
    "\n",
    "d_coln = {'R2':'Where do you live', # melb 3 vic 4\n",
    "          'R3_1':'Postcode', \n",
    "          'R4':'Gener', # 1 male 2 female\n",
    "          'R5':'Age', \n",
    "          'R6':'What type of vehicle do you drive most often?', \n",
    "          'R7':'When purchasing fuel, is it generally for', \n",
    "          'R9_1':'price_rank', 'R9_2':'location_rank', 'R9_3':'convenience_rank', 'R9_4':'brand_rank', \n",
    "          'R9_5':'access_rank', 'R9_6':'safety_rank', 'R9_7':'store_rank', 'R9_8':'service_rank', 'R9_1':'hour_rank', \n",
    "          'R18':'Brand'} # 1 bp 2 caltex 3 coles 4 woolworths 5 7-11 6 united 7 costco 8 puma}   \n",
    "d_r6 = {1:'car', 2:'suv', 3:'truck', 4:'motorcycle'}\n",
    "d_r10 = {1:'search', 2:'supermarket_discount', 3:'corporate_card', 4:'other'}\n",
    "d_r18 = {1:'bp', 2:'Caltex', 3:'Coles', 4:'Woolworths', 5:'7-Eleven', 6:'United', 7:'Costco', 8:'Puma'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eac8ade1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df15_out = df15.copy()\n",
    "df15_out['location'] = df15_out['R2'].map(meta17.variable_value_labels['Q3'])\n",
    "df15_out['postcode'] = df15_out['R3_1']\n",
    "df15_out['gender'] = df15_out['R4'].map(meta17.variable_value_labels['Q5'])\n",
    "df15_out['age'] = df15_out['R5'].map(meta17.variable_value_labels['Q6'])\n",
    "df15_out['R18_9'] = df15_out['R18_9'].fillna('').astype(str)\n",
    "df15_out.loc[df15_out['R18_9'].str.lower().str.contains('wool'), 'R18'] = 4\n",
    "df15_out.loc[df15_out['R18_9'].str.lower()=='bp', 'R18'] = 1\n",
    "df15_out.loc[df15_out['R18_9'].str.lower()=='coles', 'R18'] = 3\n",
    "df15_out['brand'] = df15_out['R18'].map(meta17.variable_value_labels['Q13'])\n",
    "df15_out['why_price'] = (df15_out['R19_1']==1).astype(int) # / df15[[i for i in df15.columns if 'R19' in i]].sum(axis=1)\n",
    "df15_out['why_prog'] = (df15_out['R19_3']==1).astype(int) # / df15[[i for i in df15.columns if 'R19' in i]].sum(axis=1)\n",
    "df15_out['why_close'] = (df15_out['R19_8']==1).astype(int) # / df15[[i for i in df15.columns if 'R19' in i]].sum(axis=1)\n",
    "# df15_out['dist_search'] = df15_out['Q20'].map(meta15.variable_value_labels['Q20'])\n",
    "# df15_out['postcode'] = df15_out['Q4']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf023c42",
   "metadata": {},
   "outputs": [],
   "source": [
    "df15_out['bn'] = df15_out['brand'].copy()\n",
    "df15_out.loc[df15_out['brand']=='petrol', 'bn'] = 'woolworths'\n",
    "df15_out.loc[df15_out['brand']=='sevenEleven', 'bn'] = '7-11'\n",
    "df15_out.loc[~df15_out['bn'].isin(['bp', 'caltex', 'coles', 'woolworths', '7-11']), 'bn'] = 'other'\n",
    "df15_out.loc[df15_out['brand'].isnull(), 'bn'] = 'N/A'\n",
    "df15_out['year'] = 2015"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e515ea7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('{} responses from vic'.format(df15[df15['R2'].isin([3,4])].shape[0]))\n",
    "print('{} responses from melbourne'.format(df15[df15['R2']==3].shape[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "507991ae",
   "metadata": {},
   "source": [
    "# 2019"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f17cdd37",
   "metadata": {},
   "outputs": [],
   "source": [
    "df19, meta19 = pyreadstat.read_sav(\n",
    "    './Input/ACAPMA/ORD-417715-F6Q4 - Petrol and Convenient Express Stores 3_Final Data_25Jun/ORD-417715-F6Q4 - Petrol and Convenient Express Stores 3_Final Data_25Jun.sav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbca10ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "df19_out = df19.copy()\n",
    "df19_out['location'] = df19_out['Q3'].map(meta19.variable_value_labels['Q3'])\n",
    "df19_out['state'] = df19_out['HIDSTATE1'].map(meta19.variable_value_labels['HIDSTATE1'])\n",
    "df19_out['area'] = df19_out['AREA'].map(meta19.variable_value_labels['AREA'])\n",
    "df19_out['gender'] = df19_out['Q6'].map(meta19.variable_value_labels['Q6'])\n",
    "df19_out['age'] = df19_out['Q7'].map(meta19.variable_value_labels['Q7'])\n",
    "df19_out['hh_inc'] = df19_out['Q8'].map(meta19.variable_value_labels['Q8'])\n",
    "df19_out['visit_freq'] = df19_out['Q10'].map(meta19.variable_value_labels['Q10'])\n",
    "df19_out['Q17_12_OTHER'] = df19_out['Q17_12_OTHER'].fillna('')\n",
    "df19_out.loc[df19_out['Q17_12_OTHER'].str.lower().str.contains('wool'), 'Q17'] = 4\n",
    "df19_out.loc[df19_out['Q17_12_OTHER'].str.lower()=='bp', 'Q17'] = 1\n",
    "df19_out['brand'] = df19_out['Q17'].map(meta19.variable_value_labels['Q17'])\n",
    "df19_out['brand'] = df19_out['brand'].str.lower()\n",
    "df19_out['why'] = df19_out['Q18'].map(meta19.variable_value_labels['Q18'])\n",
    "df19_out['dist_search'] = df19_out['Q20'].map(meta19.variable_value_labels['Q20'])\n",
    "df19_out['postcode'] = df19_out['Q5']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "940dece3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df19_out['bn'] = df19_out['brand'].copy()\n",
    "df19_out.loc[df19_out['brand']=='petrol', 'bn'] = 'woolworths'\n",
    "df19_out.loc[df19_out['brand']=='7/11', 'bn'] = '7-11'\n",
    "df19_out.loc[df19_out['brand']=='shell coles express', 'bn'] = 'coles'\n",
    "df19_out.loc[~df19_out['bn'].isin(['bp', 'caltex', 'coles', 'woolworths', '7-11']), 'bn'] = 'other'\n",
    "df19_out.loc[df19_out['brand'].isnull(), 'bn'] = 'N/A'\n",
    "df19_out['year'] = 2019"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63c2c16f",
   "metadata": {},
   "outputs": [],
   "source": [
    "(df19_out['bn'].value_counts()/df19_out['bn'].value_counts().sum()).round(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e50c9b7",
   "metadata": {},
   "source": [
    "# Summary\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a96cafc",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df15_out['state'] = ''\n",
    "df15_out.loc[df15_out['location'].isin(['Sydney', 'Other NSW']), 'state'] = 'NSW'\n",
    "df15_out.loc[df15_out['location'].isin(['Melbourne', 'Other VIC']), 'state'] = 'VIC'\n",
    "df15_out.loc[df15_out['location'].isin(['Brisbane', 'Other QLD']), 'state'] = 'QLD'\n",
    "df15_out.loc[df15_out['location'].isin(['Perth', 'Other WA']), 'state'] = 'WA'\n",
    "df15_out.loc[df15_out['location'].isin(['Adelaide', 'Other SA']), 'state'] = 'SA'\n",
    "df15_out.loc[df15_out['location'].isin(['Hobart', 'Other TAS']), 'state'] = 'TAS'\n",
    "df15_out.loc[df15_out['location'].isin(['NT']), 'state'] = 'NT'\n",
    "df15_out.loc[df15_out['location'].isin(['ACT']), 'state'] = 'ACT'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a655b017",
   "metadata": {},
   "outputs": [],
   "source": [
    "df17_out['state'] = ''\n",
    "df17_out.loc[df17_out['location'].isin(['Sydney', 'Other NSW']), 'state'] = 'NSW'\n",
    "df17_out.loc[df17_out['location'].isin(['Melbourne', 'Other VIC']), 'state'] = 'VIC'\n",
    "df17_out.loc[df17_out['location'].isin(['Brisbane', 'Other QLD']), 'state'] = 'QLD'\n",
    "df17_out.loc[df17_out['location'].isin(['Perth', 'Other WA']), 'state'] = 'WA'\n",
    "df17_out.loc[df17_out['location'].isin(['Adelaide', 'Other SA']), 'state'] = 'SA'\n",
    "df17_out.loc[df17_out['location'].isin(['Hobart', 'Other TAS']), 'state'] = 'TAS'\n",
    "df17_out.loc[df17_out['location'].isin(['NT']), 'state'] = 'NT'\n",
    "df17_out.loc[df17_out['location'].isin(['ACT']), 'state'] = 'ACT'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02f21162",
   "metadata": {},
   "outputs": [],
   "source": [
    "df19_out['ST'] = df19_out['state'].map({'Queensland':\"QLD\", 'New South Wales':'NSW', 'Victoria':'VIC', \n",
    "                                        'South Australia':'SA', 'Western Australia':'WA', \n",
    "                                        'Australia Capital Territory':'ACT', 'Tasmania':'TAS',\n",
    "                                        'Northern Territory':'NT'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7338ff35",
   "metadata": {},
   "source": [
    "# margin of errors - Melbourne"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7bf27308",
   "metadata": {},
   "outputs": [],
   "source": [
    "tab_bn_mel_cf = pd.DataFrame(\n",
    "    {'2015':df15_out.loc[(df15_out['location']=='Melbourne')&(df15_out['brand'].notnull()), \n",
    "                          'bn'].value_counts(normalize=True), \n",
    "     '2017':df17_out.loc[(df17_out['location']=='Melbourne')&(df17_out['brand'].notnull()), \n",
    "                          'bn'].value_counts(normalize=True), \n",
    "     '2019':df19_out.loc[(df19_out['location']=='Melbourne')&(df19_out['brand'].notnull()), \n",
    "                          'bn'].value_counts(normalize=True)})\n",
    "tab_bn_mel_cf = tab_bn_mel_cf.loc[['bp', 'caltex', 'coles', 'woolworths', '7-11', 'other']]\n",
    "tab_bn_mel_cf.index = ['BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven', 'Other']\n",
    "\n",
    "l_n = [df15_out.loc[(df15_out['location']=='Melbourne')&(df15_out['brand'].notnull())].shape[0],\n",
    "       df17_out.loc[(df17_out['location']=='Melbourne')&(df17_out['brand'].notnull())].shape[0],\n",
    "       df19_out.loc[(df19_out['location']=='Melbourne')&(df19_out['brand'].notnull())].shape[0]]\n",
    "\n",
    "tab_bn_mel_me = pd.DataFrame(index=tab_bn_mel_cf.index)\n",
    "l_y = ['2015', '2017', '2019']\n",
    "for i in range(3):\n",
    "    iy = l_y[i]\n",
    "    tab_bn_mel_me[iy] = np.sqrt(tab_bn_mel_cf[iy]*(1-tab_bn_mel_cf[iy])/l_n[i])\n",
    "\n",
    "tab_bn_mel_cf = tab_bn_mel_cf.map(\"{:.3f}\".format)\n",
    "tab_bn_mel_me = tab_bn_mel_me.map(\"({:.3f})\".format)\n",
    "tab_bn_mel_cf['r'] = 1\n",
    "tab_bn_mel_me['r'] = 2\n",
    "tab_bn_mel_cfme = pd.concat([tab_bn_mel_cf, \n",
    "                             tab_bn_mel_me], axis=0).reset_index()\n",
    "tab_bn_mel_cfme = tab_bn_mel_cfme.groupby(['index', 'r']).first().loc[['BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven', 'Other']]\n",
    "tab_bn_mel_cfme.index = ['BP', '', 'Caltex', '', 'Coles', '', 'Woolworths', '', '7-Eleven', '', 'Other', '']\n",
    "tab_bn_mel_cfme.loc['Number of responses'] = [str(int(i)) for i in l_n]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9fbd5d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "v15 =  df15_out.loc[(df15_out['location']=='Melbourne')&(df15_out['brand'].notnull()), \n",
    "                          'bn'].value_counts(normalize=True).loc[['bp', 'caltex', 'coles', 'woolworths', '7-11']].values\n",
    "v17 = df17_out.loc[(df17_out['location']=='Melbourne')&(df17_out['brand'].notnull()), \n",
    "                          'bn'].value_counts(normalize=True).loc[['bp', 'caltex', 'coles', 'woolworths', '7-11']].values\n",
    "e15 = np.sqrt(v15 * (1 - v15) / l_n[0])\n",
    "e17 = np.sqrt(v17 * (1 - v17) / l_n[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "433428d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "tex_tab = tab_bn_mel_cfme.style.to_latex(hrules=True)\n",
    "l_tex = tex_tab.split('\\n')\n",
    "l_tex.insert(2, \"\"\"& Symmetric & \\multicolumn{2}{c}{Asymmetric} \\\\\\\\ \"\"\")\n",
    "l_tex.insert(2, \"\"\" \\\\cmidrule(lr){2-4} \"\"\")\n",
    "l_tex.insert(2, \"\"\" & \\\\multicolumn{3}{c}{Information Sharing} \\\\\\\\ \"\"\")\n",
    "l_tex.insert(-4, \"\"\" \\\\midrule \"\"\")\n",
    "l_tex[0] = '\\\\begin{tabular}{rccc}'\n",
    "print('\\n'.join(l_tex))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97fed211",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"../Analysis/Output/tab2_choiceprob.tex\", \"w\") as f:\n",
    "    f.write('\\n'.join(l_tex))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61b594c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_out = pd.DataFrame()\n",
    "bc15 = df15_out.loc[(df15_out['location']=='Melbourne')&(df15_out['brand'].notnull()), 'bn']\n",
    "bc17 = df17_out.loc[(df17_out['location']=='Melbourne')&(df17_out['brand'].notnull()), 'bn']\n",
    "np.random.seed(0)\n",
    "for i in range(50):\n",
    "    v15, n15 = np.unique(np.random.choice(bc15, bc15.shape, replace=True), return_counts=True)\n",
    "    n15 = n15 / np.sum(n15)\n",
    "    _d15 = dict(zip(v15, n15))\n",
    "    v17, n17 = np.unique(np.random.choice(bc17, bc17.shape, replace=True), return_counts=True)\n",
    "    n17 = n17 / np.sum(n17)\n",
    "    _d17 = dict(zip(v17, n17))\n",
    "    _l15 = [_d15[j] for j in ['bp', 'caltex', 'coles', 'woolworths', '7-11']] \n",
    "    _l17 = [_d17[j] for j in ['bp', 'caltex', 'coles', 'woolworths', '7-11']] \n",
    "    df_out[str(i)] = _l15 + _l17\n",
    "    df_out.to_csv(\"./Output/calibration_shares.csv\", index=False)\n",
    "    # with open(\"./Output/calibration_sensitivity/s{:02d}.txt\".format(i), \"a\") as f:\n",
    "    #     f.write(json.dumps(_l15+_l17))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "940dc431",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
